import pandas as pd
from math import log

def recompute_columns(df):
    """Compute hits, p, H, typicality for each target, for each set.

    hits: number of data points for the target
    p: mean rating
    H: entropy as a function of p
    typicality: how typical is this data point, relative to the target's mean rating? 
                Calculated as log(p) if rating is 1, log(1-p) if rating is 0.
    
    Note  
    ----
    This may take a long time to process (12+ hours).
    
    """
    grouped = df.groupby(['set', 'target'], as_index=False)
    ng = [(name, group) for name, group in grouped]

    for n, g in ng:
        g['hits'] = len(g['rating'])
        p = g['rating'].mean()
        if p == 1.0: p = .99999
        if p == 0.0: p = .00001
        g['p'] = p
        g['H'] = -((p*log(p)) + ((1-p)*log(1-p)))

        g.loc[g.rating == 1, 'typicality'] = log(p)
        g.loc[g.rating == 0, 'typicality'] = log(1-p)
    
    # Recombine to full dataframe
    df = pd.concat([g for n,g in ng])    

d = pd.read_csv('numbergame_data.csv')
recompute_data(d)